suppressPackageStartupMessages({
library(Seurat)
library(ggplot2)
library(dplyr)
library(ggthemes)
library(uwot)
library(symphony)
library(pheatmap)
library(harmony)
library(readxl)
library(presto)
library(anndata)
library(singlecellmethods)
library(rlang)
})
fig.size <- function(h, w) {
options(repr.plot.height = h, repr.plot.width = w)
}
data.path <- "/data/brennerlab/AMP_Phase_2/Data/Processed_single_cell_data/processed_output_04-11-2023/"
T.metadata.p <- paste0(data.path, "T_reference.rds")
T.uwot.p <- paste0(data.path, "T_uwot_model")
all.qc.p <- paste0(data.path, "qc_mRNA_314011cells_log_normalized_matrix.rds")
all.qc.prot.p <- paste0(data.path, "qc_protein_CLR_normalized_filtered_matrix.rds")
all.raw.counts <- paste0(data.path, "raw_mRNA_count_matrix.rds")
all.raw.counts.prot <- paste0(data.path, "raw_protein_count_matrix.rds")
CTAP.path <- paste0(data.path, "CTAP_donor_mapping.xlsx")
donor.meta.data <- "/data/brennerlab/Shani/projects/AMP_Phase_2/treatment assigned metadata_clin_donor_singlecell.csv"
set.seed(1)
amp_ref <- readRDS(T.metadata.p)
amp_meta <- amp_ref$meta_data
str(amp_ref, 1)
List of 11 $ meta_data :'data.frame': 94046 obs. of 13 variables: $ vargenes : tibble [5,179 × 3] (S3: tbl_df/tbl/data.frame) $ loadings : num [1:5179, 1:20] 0.013549 0.001822 -0.000413 0.003793 -0.003837 ... ..- attr(*, "dimnames")=List of 2 $ R : num [1:100, 1:94046] 3.75e-09 1.57e-07 1.54e-06 8.96e-06 3.88e-09 ... $ Z_orig : num [1:20, 1:94046] -0.48237 -1.13399 0.85466 0.00182 -0.08951 ... $ Z_corr : num [1:20, 1:94046] -0.689 -0.769 0.98 -0.504 0.207 ... $ betas : num [1:82, 1:20, 1:100] 1.084319 0.318315 -0.010745 -0.007796 0.000507 ... $ centroids : num [1:20, 1:100] 0.372 -0.101 -0.162 0.32 -0.188 ... $ cache :List of 2 $ umap :List of 1 $ save_uwot_path: chr "/data/srlab2/anathan/AMP/data/2020.11.25_Phase2RA_Tcells_cca_400_raw_postHarmony_Tfilter2_refumap.rds"
amp_ref %>% summary
Length Class Mode meta_data 13 data.frame list vargenes 3 tbl_df list loadings 103580 -none- numeric R 9404600 -none- numeric Z_orig 1880920 -none- numeric Z_corr 1880920 -none- numeric betas 164000 -none- numeric centroids 2000 -none- numeric cache 2 -none- list umap 1 -none- list save_uwot_path 1 -none- character
fig.size(12,10)
amp_meta %>% with(table(cluster_name)) %>% as.data.frame() %>%
ggplot() +
geom_col(aes(Freq, cluster_name)) + theme_bw(base_size = 24)
amp_meta$cluster_name%>% table()
.
T-0: CD4+ IL7R+ memory T-1: CD4+ CD161+ memory
9902 8520
T-10: CD4+ OX40+NR3C1+ T-11: CD4+ CD146+ memory
2553 1867
T-12: CD4+ GNLY+ T-13: CD8+ GZMK/B+ memory
1604 8693
T-14: CD8+ GZMK+ memory T-15: CD8+ GZMB+/TEMRA
6870 4723
T-16: CD8+ CD45ROlow/naive T-17: CD8+ activated/NK-like
4554 692
T-18: Proliferating T-19: MT-high (low quality)
1952 1702
T-2: CD4+ IL7R+CCR5+ memory T-20: CD38+
6595 748
T-21: Innate-like T-22: Vdelta1
1473 3373
T-23: Vdelta2 T-3: CD4+ Tfh/Tph
1161 6118
T-4: CD4+ naive T-5: CD4+ GZMK+ memory
4947 4485
T-6: CD4+ memory T-7: CD4+ Tph
3167 2859
T-8: CD4+ CD25-high Treg T-9: CD4+ CD25-low Treg
2841 2647
2841+2647
amp_ref$save_uwot_path <- T.uwot.p
amp_ref$meta_data%>% head
| cell | sample | cluster | cell_type | donor | fibro | Tfilter | nUMI | nGene | percent_mito | Tfilter2 | cluster_number | cluster_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | <chr> | <lgl> | <lgl> | <dbl> | <int> | <dbl> | <lgl> | <chr> | <chr> | |
| 201003 | BRI-399_AAACGAATCTGCATGA | BRI-399 | T-6: CD4+ memory | T cell | BRI-399 | TRUE | FALSE | 7560 | 2157 | 0.06521164 | TRUE | T-6 | T-6: CD4+ memory |
| 301006 | BRI-399_AAACGCTTCCTTGACC | BRI-399 | T-5: CD4+ GZMK+ memory | T cell | BRI-399 | TRUE | FALSE | 2563 | 1171 | 0.16269996 | TRUE | T-5 | T-5: CD4+ GZMK+ memory |
| 401005 | BRI-399_AAAGGGCAGCCGGAAT | BRI-399 | T-8: CD4+ CD25-high Treg | T cell | BRI-399 | TRUE | FALSE | 4922 | 1741 | 0.07334417 | TRUE | T-8 | T-8: CD4+ CD25-high Treg |
| 431002 | BRI-399_AAAGGGCCACTATGTG | BRI-399 | T-22: Vdelta1 | T cell | BRI-399 | TRUE | FALSE | 3968 | 1619 | 0.06880040 | TRUE | T-22 | T-22: Vdelta1 |
| 531002 | BRI-399_AAAGGTAGTGCAGGAT | BRI-399 | T-8: CD4+ CD25-high Treg | T cell | BRI-399 | TRUE | FALSE | 4101 | 1492 | 0.09022190 | TRUE | T-8 | T-8: CD4+ CD25-high Treg |
| 691002 | BRI-399_AAAGTGAGTTGTGGCC | BRI-399 | T-14: CD8+ GZMK+ memory | T cell | BRI-399 | TRUE | FALSE | 3390 | 1417 | 0.02300885 | TRUE | T-14 | T-14: CD8+ GZMK+ memory |
# load counts and qc counts
amp_counts <- readRDS(all.raw.counts)
dim(amp_counts)
dim(amp_meta)
#filter counts to to include T cells only by metadata
cells <- amp_meta$cell
amp_counts <- amp_counts[,cells]
# load counts and qc counts
protein_counts <- readRDS(all.raw.counts.prot)
dim(protein_counts)
#filter counts to to include T cells only by metadata
protein_counts <- protein_counts[,cells]
meta <- amp_meta %>% tibble::remove_rownames()%>% tibble::column_to_rownames(var = "cell")
amp <- CreateSeuratObject(counts = amp_counts, meta.data = meta, project = "AMPII")
amp[["ADT"]] <- CreateAssayObject(counts = protein_counts)
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
donor.meta <- read.csv(donor.meta.data)
merged.meta <- amp@meta.data%>% left_join(donor.meta, by =join_by("orig.ident" == "mRNA_run"))
head(merged.meta, 3)
| orig.ident | nCount_RNA | nFeature_RNA | sample | cluster | cell_type | donor | fibro | Tfilter | nUMI | ⋯ | sex | age | CDAI | disease_duration | tissue_type | krenn_inflammation | single_cell_tech | cell_count_to_10x | protein_run | atac_run | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <int> | <chr> | <chr> | <chr> | <chr> | <lgl> | <lgl> | <dbl> | ⋯ | <chr> | <int> | <chr> | <dbl> | <chr> | <dbl> | <chr> | <chr> | <chr> | <chr> | |
| 1 | BRI-399 | 7560 | 2157 | BRI-399 | T-6: CD4+ memory | T cell | BRI-399 | TRUE | FALSE | 7560 | ⋯ | Female | 70 | NA | Arthroplasty | NA | CITEseq + flow/bulk + re-freeze | 15,000 | BRI-400 | BRI-448 | |
| 2 | BRI-399 | 2563 | 1171 | BRI-399 | T-5: CD4+ GZMK+ memory | T cell | BRI-399 | TRUE | FALSE | 2563 | ⋯ | Female | 70 | NA | Arthroplasty | NA | CITEseq + flow/bulk + re-freeze | 15,000 | BRI-400 | BRI-448 | |
| 3 | BRI-399 | 4922 | 1741 | BRI-399 | T-8: CD4+ CD25-high Treg | T cell | BRI-399 | TRUE | FALSE | 4922 | ⋯ | Female | 70 | NA | Arthroplasty | NA | CITEseq + flow/bulk + re-freeze | 15,000 | BRI-400 | BRI-448 |
merged.meta$subject_id%>% head
colnames(donor.meta)
colnames(amp@meta.data)
#Add metadata
amp <- AddMetaData(amp, merged.meta$age, "age")
amp <- AddMetaData(amp, merged.meta$sex, "sex")
amp <- AddMetaData(amp, merged.meta$treatment, "treatment")
amp <- AddMetaData(amp, merged.meta$CDAI, "CDAI")
amp <- AddMetaData(amp, merged.meta$disease_duration, "disease.duration")
amp <- AddMetaData(amp, merged.meta$tissue_type, "tissue.type")
amp <- AddMetaData(amp, merged.meta$krenn_inflammation, "krenn")
amp <- AddMetaData(amp, merged.meta$subject_id, "subject_id")
# Add CTAP info
ctap.file <- read_xlsx(CTAP.path)
ctap.file%>% head
| subject_id | donor | CTAP |
|---|---|---|
| <chr> | <chr> | <chr> |
| 300-0310 | BRI-405 | E + F + M |
| 300-0309 | BRI-411 | E + F + M |
| 300-0174 | BRI-479 | E + F + M |
| 300-0175 | BRI-525 | E + F + M |
| 300-0529 | BRI-554 | E + F + M |
| 300-0145 | BRI-589 | E + F + M |
donorID <- amp@meta.data%>% select(orig.ident)%>% tibble::rownames_to_column(., "cellID")%>%
left_join(ctap.file, by = join_by(orig.ident == donor))%>%
tibble::column_to_rownames(., "cellID") %>% .[rownames(amp@meta.data),]
amp <- AddMetaData(amp, donorID$CTAP, "CTAP")
fig.size(5,7)
sum.donor <- amp@meta.data %>% select(donor, CTAP, treatment, sex) %>% distinct()
pt <- table(sum.donor$CTAP[sum.donor$treatment != "OA"], sum.donor$sex[sum.donor$treatment != "OA"])
pt
pt %>% data.frame()%>% setNames(c("CTAP", "SEX", "Freq")) %>% mutate(per = Freq / ifelse(SEX == "Female", sum(Freq[SEX == "Female"]), sum(Freq[SEX == "Male"])))%>%
ggplot(aes(x = CTAP, y = per, fill = SEX)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack")
pl + geom_col(position = "fill")
Female Male
E + F + M 6 1
F 7 4
M 13 5
T + B 11 3
T + F 6 2
T + M 9 3
fig.size(5,20)
VlnPlot(amp, features = "nFeature_RNA", group.by = "cluster", pt.size = 0)
VlnPlot(amp, features = "nCount_RNA", group.by = "cluster", pt.size = 0)
VlnPlot(amp, features = "percent_mito", group.by = "cluster", pt.size = 0)
#RNA
amp <- amp %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
ScaleData()%>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
RunPCA(verbose = F)
# RunBalancedPCA(weight.by='orig.ident')
#protein
amp <- amp %>%
NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") %>%
ScaleData(assay = "ADT")
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
# add variable gene list
# amp@assays$RNA@var.features <- amp_ref$vargenes$symbol
amp@assays$ADT%>% rownames
a <- amp_ref$umap$embedding
colnames(a) <- c("UMAP_1", "UMAP_2")
rownames(a) <-amp_ref$meta_data$cell
a%>% head
| UMAP_1 | UMAP_2 | |
|---|---|---|
| BRI-399_AAACGAATCTGCATGA | -0.7474533 | -0.7355343 |
| BRI-399_AAACGCTTCCTTGACC | 2.6602657 | 0.7497368 |
| BRI-399_AAAGGGCAGCCGGAAT | -2.1143181 | 2.0444754 |
| BRI-399_AAAGGGCCACTATGTG | 5.5098784 | 1.5855038 |
| BRI-399_AAAGGTAGTGCAGGAT | -1.5639387 | 2.5774951 |
| BRI-399_AAAGTGAGTTGTGGCC | 4.2999895 | -0.9138112 |
amp@reductions$umap <- CreateDimReducObject(embeddings = a, key = "UMAP_", assay= "RNA")
fig.size(8,14)
DimPlot(amp, reduction = "umap", group.by = "cluster")
fig.size(5,18)
FeaturePlot(amp, features = c("FOXP3","IL2RA","IL7R", "MKI67"), ncol = 4)
Idents(amp) <- "cluster_number"
IL2RA <- wilcoxauc(amp, groups_use = c("T-8", "T-9"))
m.show <- IL2RA %>%
group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
IL2RA %>% write.csv("../../analysis/AMP2_RA_tissue/Treg.DEG.IL2RA.csv")
m.show %>% write.csv("../../analysis/AMP2_RA_tissue/Treg.DEG.IL2RA.Passed.csv")
m.show[1:30,]
| row | T-8 | T-9 |
|---|---|---|
| <int> | <chr> | <chr> |
| 1 | IL32 | JUNB |
| 2 | HLA-DRB1 | RPS12 |
| 3 | CTSC | RPS6 |
| 4 | TNFRSF18 | RPS3A |
| 5 | LGALS1 | RPL32 |
| 6 | CD74 | EEF1B2 |
| 7 | CYTOR | RPLP2 |
| 8 | S100A4 | RPS13 |
| 9 | RNF213 | RPS14 |
| 10 | TNFRSF4 | FOS |
| 11 | LGALS3 | RPS8 |
| 12 | HLA-DPB1 | RPL30 |
| 13 | MAF | RPS18 |
| 14 | MYL6 | RPL9 |
| 15 | NEAT1 | RPL13 |
| 16 | CTLA4 | RPS20 |
| 17 | TNFRSF1B | ZFP36L2 |
| 18 | HLA-DPA1 | RPL3 |
| 19 | S100A6 | RPL39 |
| 20 | MIR4435-2HG | RPL34 |
| 21 | TYMP | RPL35A |
| 22 | CD2 | RPS23 |
| 23 | ARPC1B | RPL22 |
| 24 | ENO1 | MAL |
| 25 | HLA-A | RPL38 |
| 26 | FOXP3 | RPL5 |
| 27 | IL2RA | RPS29 |
| 28 | UCP2 | RPS5 |
| 29 | FAM129A | RPS15A |
| 30 | GLRX | RPS27A |
# library(anndata)
# ad <- AnnData(
# X = t(amp@assays$RNA@counts),
# obs = amp@meta.data,
# var = data.frame(row.names = rownames(amp))
# )
# write_h5ad(ad, "../../analysis/AMP2_RA_tissue/AMP.allTcells.h5ad")
saveRDS(amp, "/data/brennerlab/Shani/projects/AMP_Phase_2/AMP2_Tcells_Seurat.rds")
amp <- readRDS("/data/brennerlab/Shani/projects/AMP_Phase_2/AMP2_Tcells_Seurat.rds")
fig.size(8,10)
ampdat <- amp@reductions$umap@cell.embeddings %>% data.frame%>% mutate(cluster = amp$cluster) %>%
mutate(highlight = if_else(grepl("Treg", cluster), cluster, "Other T"))
ggplot(ampdat, aes(x=UMAP_1, y = UMAP_2, color = highlight)) + geom_point(size =0.5) +
scale_color_manual(values = c("grey", "#43A5BE", "#F07857")) + theme_bw(base_size = 20)+ coord_fixed()
fig.size(8,20)
ggplot(ampdat, aes(x=UMAP_1, y = UMAP_2, color = cluster)) + geom_point(size =0.5) + coord_fixed() +
theme_bw(base_size = 20)
# data.frame("umap1" = amp@reductions$umap$UMAP_1, "umap2" = amp@reductions$umap$UMAP_2, "cluster" = amp$cluster)
Error in data.frame(.): object 'amp' not found
Traceback:
1. mutate(., highlight = if_else(grepl("Treg", cluster), cluster,
. "Other T"))
2. mutate(., cluster = amp$cluster)
3. data.frame(.)
4. .handleSimpleError(function (cnd)
. {
. watcher$capture_plot_and_output()
. cnd <- sanitize_call(cnd)
. watcher$push(cnd)
. switch(on_error, continue = invokeRestart("eval_continue"),
. stop = invokeRestart("eval_stop"), error = invokeRestart("eval_error",
. cnd))
. }, "object 'amp' not found", base::quote(data.frame(.)))
fig.size(5,6)
FeaturePlot(amp, features = c("FOXP3"))
exp.dat <- amp@meta.data[, c("cluster_number", "donor")]%>% mutate(Treg = ifelse(cluster_number %in% c("T-8", "T-9"), "Treg", "Other T"))%>%
group_by(donor,Treg)%>% summarize(n = n())%>% ungroup(donor, Treg)
# exp.dat
fig.size(5,10)
p <- ggplot(exp.dat, aes(x=donor, y=n, fill=Treg)) + theme_bw(base_size = 20) + theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p + geom_bar(stat="identity")
p <- p + geom_bar(stat="identity", position = "fill")
p
# p + geom_hline(yintercept=0.01)
`summarise()` has grouped output by 'donor'. You can override using the `.groups` argument.
unique(amp$cluster[grepl("CD4\\+", amp$cluster)])
exp.dat <- amp@meta.data[, c("cluster","donor")]%>% filter(grepl("CD4\\+", cluster))%>%
mutate(Treg = ifelse(grepl("Treg", cluster), "Treg", "Other CD4+T"))%>%
group_by(donor,Treg) %>% summarize(n = n())%>% ungroup(donor, Treg)
# exp.dat
fig.size(5,10)
p <- ggplot(exp.dat, aes(x=donor, y=n, fill=Treg)) + theme_bw(base_size = 20) + theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p + geom_bar(stat="identity")
p <- p + geom_bar(stat="identity", position = "fill")
p
# p + geom_hline(yintercept=0.01)
`summarise()` has grouped output by 'donor'. You can override using the `.groups` argument.
exp.dat %>% group_by(donor, Treg) %>% summarise(n = sum(n)) %>% ungroup() %>%
group_by(donor) %>% mutate(total = sum(n)) %>% group_by(donor, Treg) %>% mutate(prop = n/total) %>%
filter(Treg == "Treg") %>% ungroup() %>% with(prop) -> Treg.prop
round(min(Treg.prop)*100,1)
round(max(Treg.prop)*100,1)
round(median(Treg.prop)*100, 1)
round(mean(Treg.prop)*100,1)
round(IQR(Treg.prop)*100,1)
round(sd(Treg.prop)*100,1)
hist(Treg.prop)
`summarise()` has grouped output by 'donor'. You can override using the `.groups` argument.
exp.dat <- amp@meta.data[, c("cluster_number", "donor", "sex")]%>%
mutate(Treg = ifelse(cluster_number %in% c("T-8", "T-9"), "Treg", "Other T"))%>%
group_by(donor,Treg, sex)%>% summarize(n = n())%>% ungroup()
p <- ggplot(exp.dat, aes(x=donor, y=n, fill=Treg)) + theme_bw(base_size = 20) + facet_wrap(~sex) + theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p + geom_bar(stat="identity")
p <- p + geom_bar(stat="identity", position = "fill")
p
`summarise()` has grouped output by 'donor', 'Treg'. You can override using the `.groups` argument.
exp.dat.f <- amp@meta.data[, c("cluster_number", "donor")]%>% mutate(Treg = ifelse(cluster_number %in% c("T-8", "T-9"), "Treg", "Other T"))%>%
group_by(donor,Treg)%>% summarize(n = n())%>% ungroup(donor, Treg)%>% filter(Treg == "Treg")
p <- ggplot(exp.dat.f, aes(x=reorder(donor, -n), y=n, fill=Treg)) + geom_bar(stat="identity") + theme_bw(base_size = 20)+
theme(axis.ticks.x=element_blank(),axis.text.x=element_blank())
p
min.Tregs <- 20
c <- exp.dat.f%>% filter(n<min.Tregs)%>% count()%>% pull()
remained <- exp.dat.f%>% filter(n>=min.Tregs)%>% summarise(sum(n))%>% pull()
removed <- exp.dat.f %>% summarise(sum(n))%>% pull() - remained
p + geom_hline(yintercept=min.Tregs) +
annotate("text", x = length(exp.dat.f$donor) - c/2, y = min.Tregs+10, label = paste0("Removing ", c, " donors with <", min.Tregs, " Tregs"), size = 6) +
annotate("text", x = 15, y = 255, label = paste0("Remaining ", remained, " Tregs"), size = 6) +
annotate("text", x = 65, y = 255, label = paste0("Removing ", removed, " Tregs"), size = 6)
min.Tregs <- 50
c <- exp.dat.f%>% filter(n<min.Tregs)%>% count()%>% pull()
remained <- exp.dat.f%>% filter(n>=min.Tregs)%>% summarise(sum(n))%>% pull()
removed <- exp.dat.f %>% summarise(sum(n))%>% pull() - remained
p + geom_hline(yintercept=min.Tregs) +
annotate("text", x = length(exp.dat.f$donor) - c/2, y = min.Tregs+10, label = paste0("Removing ", c, " donors with <", min.Tregs, " Tregs"), size = 6) +
annotate("text", x = 15, y = 255, label = paste0("Remaining ", remained, " Tregs"), size = 6) +
annotate("text", x = 65, y = 255, label = paste0("Removing ", removed, " Tregs"), size = 6)
`summarise()` has grouped output by 'donor'. You can override using the `.groups` argument.
amp
An object of class Seurat 33596 features across 94046 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 2 dimensional reductions calculated: pca, umap
amp$cluster %>% unique
fig.size(15, 15)
VlnPlot(amp, features = c("FOXP3", "IL2RA", "AREG"), group.by = "cluster", ncol = 1)
VlnPlot(amp, features = c("FOXP3", "IL2RA", "AREG"), group.by = "cluster", ncol = 1, pt.size = 0.0000001)
Treg.names <- c("T-8: CD4+ CD25-high Treg", "T-9: CD4+ CD25-low Treg", "T-18: Proliferating")
Idents(amp) <- "cluster"
treg.amp <- subset(amp, idents= Treg.names)
treg.amp
An object of class Seurat 33596 features across 7440 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 2 dimensional reductions calculated: pca, umap
#move amp UMAP to metadata
treg.amp <-AddMetaData(treg.amp, treg.amp@reductions$umap@cell.embeddings[,1], "UMAP_1.amp")
treg.amp <-AddMetaData(treg.amp, treg.amp@reductions$umap@cell.embeddings[,2], "UMAP_2.amp")
fig.size(6,8)
DimPlot(treg.amp)
#RNA
treg.amp <- treg.amp %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
ScaleData()%>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
RunPCA(verbose = F)
# RunBalancedPCA(weight.by='orig.ident')
#protein
treg.amp <- treg.amp %>%
NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") %>%
ScaleData(assay = "ADT")
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
fig.size(5,15)
FeaturePlot(treg.amp, features = c("FOXP3", "IL2RA", "AREG", MK), ncol=3, order = T, pt.size = 0.0005)
FeaturePlot(treg.amp, features = c("FOXP3", "IL2RA", "AREG"), ncol=3, order = F, pt.size = 0.0005)
fig.size(5, 12)
DimPlot(treg.amp, reduction = "pca", group.by = "donor")
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca",
plot_convergence = TRUE, max_iter = 10,
early_stop = T)
fig.size(5,5)
ElbowPlot(treg.amp, ndims = 50, reduction = "harmony")
ElbowPlot(treg.amp, ndims = 50, reduction = "pca")
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony 5/10 Harmony 6/10 Harmony 7/10 Harmony 8/10 Harmony 9/10 Harmony 10/10 Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
fig.size(5, 12)
DimPlot(treg.amp, reduction = "harmony", group.by = "donor")
# dims_use = 1:20
# treg.amp <- treg.amp %>%
# RunUMAP(reduction="harmony", dims=dims_use, verbose=FALSE) %>%
# FindNeighbors(reduction="harmony", dims=dims_use, verbose=FALSE) %>%
# FindClusters(resolution=0.5, verbose=FALSE)
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3, spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph
treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 7440 Number of edges: 88413 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7769 Number of communities: 13 Elapsed time: 0 seconds
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(5,6)
red.use <- "humap"
DimPlot(treg.amp, reduction = red.use, label = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
fig.size(10,12)
FeaturePlot(treg.amp, reduction = red.use, c("FOXP3", "IL2RA", "AREG", "CXCR6", "CD4", "MKI67"))
FeaturePlot(treg.amp, reduction = red.use, c("FOXP3", "IL2RA", "AREG", "CXCR6", "CD4", "MKI67"), order = T)
fig.size(5,10)
VlnPlot(treg.amp, features = "FOXP3")
VlnPlot(treg.amp, features = "AREG")
VlnPlot(treg.amp, features = "CD4")
VlnPlot(treg.amp, features = "MKI67")
VlnPlot(treg.amp, features = "IL2RA")
unique(rownames(amp@assays$ADT))
fig.size(5,10)
FeaturePlot(treg.amp, reduction = "humap", features = c("CD45RA-prot", "CD45RO-prot"))
FeaturePlot(treg.amp, reduction = "humap", features = c("CCR7", "SELL"))
FetchData(treg.amp, vars = c("CD45RA-prot", "CD45RO-prot", "cluster"), assay = "ADT", slot = "data") -> dat
colnames(dat) <- c("CD45RA", "CD45RO", "cluster")
fig.size(8,10)
dat %>% ggplot(aes(x=CD45RA, y = CD45RO, col = cluster)) + geom_point() + geom_smooth(method = "lm") + theme_bw(base_size = 20) #+ facet_wrap(~cluster)
Warning message: “Could not find CD45RA-prot in the default search locations, found in ADT assay instead” Warning message: “Could not find CD45RO-prot in the default search locations, found in ADT assay instead”
Warning message: “Could not find CD45RA-prot in the default search locations, found in ADT assay instead” Warning message: “Could not find CD45RO-prot in the default search locations, found in ADT assay instead” `geom_smooth()` using formula = 'y ~ x'
fig.size(5,10)
# RA - naive ; RO - memory
VlnPlot(treg.amp, features = c("CD45RA-prot", "CD45RO-prot"), same.y.lims = T)
Warning message: “Could not find CD45RA-prot in the default search locations, found in ADT assay instead” Warning message: “Could not find CD45RO-prot in the default search locations, found in ADT assay instead” Warning message: “Removed 1 rows containing non-finite values (`stat_ydensity()`).” Warning message: “Removed 1 rows containing missing values (`geom_point()`).”
#number of cells per cluster
p.cells.foxp3 <- FetchData(treg.amp, vars= c("FOXP3", "seurat_clusters"), assay="RNA", slot = "data")
p.cells.foxp3 <- p.cells.foxp3%>% mutate(FOXP3.e = FOXP3 > 0)%>% group_by(seurat_clusters, FOXP3.e)%>% summarise(n = n())%>% ungroup()
ggplot(p.cells.foxp3, aes(x=seurat_clusters, y=n, fill = FOXP3.e)) + geom_bar(stat = "identity")
`summarise()` has grouped output by 'seurat_clusters'. You can override using the `.groups` argument.
VlnPlot(treg.amp, features = "nFeature_RNA")
VlnPlot(treg.amp, features = "nCount_RNA")
VlnPlot(treg.amp, features = "percent_mito")
# treg.markers <- FindAllMarkers(treg.amp, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, verbose = F)
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "seurat_clusters")
treg.markers %>% write.csv("../../analysis/AMP2_RA_tissue/AMP.Treg.all.clustering.markers.csv")
# treg.markers %>%
# group_by(cluster) %>%
# slice_max(n = 2, order_by = avg_log2FC)
m.show <- treg.markers %>%
group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
#slice_max(n = 30, order_by = logFC)%>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
m.show %>% write.csv("../../analysis/AMP2_RA_tissue/AMP.Treg.clustering.markers.csv")
m.show[1:25,]
| row | 0 | 1 | 10 | 11 | 12 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | RPL34 | JUNB | ISG15 | CCL5 | FN1 | CCL5 | TNFRSF18 | STMN1 | KLF2 | ZNF331 | IL32 | CXCL13 | MALAT1 |
| 2 | RPL32 | FOS | MX1 | CD74 | PRG4 | NKG7 | TNFRSF4 | TUBA1B | JUNB | JUNB | S100A4 | HLA-DRA | NEAT1 |
| 3 | RPL13 | JUN | IFIT2 | MS4A1 | CST3 | GZMA | CTLA4 | HMGB2 | DUSP1 | MAGEH1 | LTB | HLA-DRB1 | TTN |
| 4 | RPS18 | KLF2 | XAF1 | NKG7 | PLA2G2A | GZMK | IL32 | TUBB | TSC22D3 | AREG | C15orf53 | CD74 | MIAT |
| 5 | RPL39 | FOSB | IFIT3 | HLA-DRA | CLU | CCL4 | S100A4 | MKI67 | JUN | ICA1 | FOXP3 | GAPDH | PCSK7 |
| 6 | RPS12 | NR4A2 | IFI6 | GZMA | FTL | GZMH | CTSC | CENPF | RGS1 | RGS2 | GBP5 | TNFRSF18 | MTRNR2L12 |
| 7 | RPL30 | BTG2 | IFIT1 | HLA-DPB1 | CRTAC1 | CST7 | BATF | TOP2A | CREM | SESN3 | MALAT1 | TNFRSF4 | NKTR |
| 8 | RPS6 | ZNF331 | PMAIP1 | HLA-DQA1 | CFD | CD8A | LMNA | ASPM | DNAJB1 | FOS | S100A6 | RBPJ | SYNE2 |
| 9 | RPLP2 | CD69 | OASL | CCL4 | HTRA1 | HLA-DRB1 | PHLDA1 | HMGN2 | FTH1 | SRGN | PBXIP1 | MAF | DDX17 |
| 10 | RPL9 | YPEL5 | LY6E | ID2 | COL1A2 | CD74 | TNFRSF1B | GZMA | BTG2 | STAT3 | TNFRSF18 | KLRB1 | XIST |
| 11 | RPL22 | DUSP1 | ISG20 | HLA-DPA1 | LYZ | HLA-DRA | LTB | HMGB1 | KLF6 | MAL | IL2RA | FKBP5 | N4BP2L2 |
| 12 | RPL36 | BTG1 | MX2 | BANK1 | COL3A1 | HLA-DPB1 | TIGIT | CCL5 | JUND | CREM | SYNE2 | LGALS1 | MT-ND3 |
| 13 | RPS14 | AREG | HERC5 | IGKC | TIMP3 | ANXA1 | TYMP | H2AFZ | TNFAIP3 | RNF19A | EMP3 | COTL1 | POLR2J3.1 |
| 14 | RPS3A | CXCR4 | OAS1 | GZMK | ANXA1 | APOBEC3G | LINC01943 | NKG7 | AC058791.1 | TSHZ2 | SAMHD1 | HLA-DPA1 | OGA |
| 15 | RPL35A | ZFP36L2 | STAT1 | HOPX | TIMP1 | GZMM | SPOCK2 | SMC4 | LTB | YPEL5 | CD52 | CD7 | MT-ND2 |
| 16 | RPS13 | EEF1B2 | EPSTI1 | FOS | HLA-DRA | HLA-DPA1 | ENO1 | TYMS | FOSB | BTG1 | TRAC | S100A11 | MT-ND5 |
| 17 | RPL37 | SOCS3 | SAT1 | HLA-DRB1 | MGP | COTL1 | MAF | NUSAP1 | LMNA | GEM | UGP2 | HLA-DPB1 | GOLGA8A |
| 18 | RPL38 | RPS12 | RGS1 | RALGPS2 | DCN | CTSW | DUSP4 | HIST1H4C | CD69 | RPL9 | N4BP2L2 | NR3C1 | GOLGA8B |
| 19 | RPS23 | RPS8 | EIF2AK2 | HLA-DQB1 | LUM | CD8B | GAPDH | DUT | NR4A2 | NMB | HLA-A | SPOCK2 | MT-CYB |
| 20 | RPS29 | RPS6 | TYMP | MEF2C | APOE | HCST | SRGN | DEK | SAT1 | TIGIT | TMSB10 | ITM2A | NABP1 |
| 21 | RPL11 | RPL32 | SMCHD1 | JUN | C1QA | KLRG1 | FOXP3 | PCLAF | YPEL5 | ZFP36L2 | B2M | PKM | PNISR |
| 22 | RPL19 | RPS3A | ZC3HAV1 | PLCG2 | MMP3 | CCL4L2 | PKM | HLA-DRB1 | TENT5C | ITM2A | NA | ARPC1B | MT-ND4 |
| 23 | RPS28 | JUND | TRIM22 | ANXA1 | VIM | IFNG | IL2RA | GAPDH | BTG1 | RPS4X | NA | S100A4 | MT-ATP6 |
| 24 | RPL12 | RPL9 | IFI44L | CTSW | RNASE1 | GZMB | CARD16 | COTL1 | CD52 | JUND | NA | SRGN | MT-ND1 |
| 25 | RPS21 | RPS13 | FOXP3 | IL7R | EMP3 | HLA-DQA1 | LGALS1 | H2AFV | DNAJA1 | SOCS3 | NA | HLA-DQB1 | MT-CO3 |
OLD: 0 - Some Treg population - with TNFRSF18 1 - AREG, low FOXP3 2 - "naive" phenotype (high expression of ribosomal proteins) 3 - MHC class II (CD74, HLA-DPB1, HLA-DRB1), IFNg signaling (FLNA, HLA-DPB1, HLA-DRB1, SAMHD1, GBP5), NFKB cascade (CD74, FLNA, HLA-DRB1, S100A4, LGALS1) 4 - CXCR4, AREG 5 - high mitochondrial genes, lnc/ low quality cells - REMOVE 6 - CD8 GZMK? -REMOVE 7 - No FOXP3, yes Fibroblasts genes - REMOVE 8 - HSP, stimuli response? 9 - IL7R 10 - proliferating 11 - IFN signature
NEW: 2 - CD8 GZMK - REMOVE 8 - low on FOXP3 (originally from proliferating), high on CXCL13, do not have MKI 67 expression - REMOVE 9- mt high, low quality - REMOVE 11 - non CD4 non prolif - REMOVE 12 - fibroblasts - REMOVE
#Psudobulk and correlation
amp.clusters <- FetchData(object = amp,
vars = c(treg.amp@assays$RNA@var.features, "cluster"),
slot = "data", assay = "RNA")
amp.clusters <- amp.clusters %>% group_by(cluster) %>% summarise_all("mean")
new.clusters <- FetchData(object = treg.amp,
vars = c(treg.amp@assays$RNA@var.features, "seurat_clusters"),
slot = "data", assay = "RNA")
new.clusters <- new.clusters %>% group_by(seurat_clusters) %>% summarise_all("mean")
amp.clusters <- data.frame(amp.clusters)
row.names(amp.clusters) <- amp.clusters$cluster
amp.clusters$cluster <- NULL
new.clusters <- data.frame(new.clusters)
row.names(new.clusters) <- new.clusters$seurat_clusters
new.clusters$seurat_clusters <- NULL
Warning message in asMethod(object): “sparse->dense coercion: allocating vector of size 1.4 GiB”
dim(new.clusters)
cluster.cor <- cor(t(scale(amp.clusters)), t(scale(new.clusters)), use = "complete.obs")
cluster.cor <- data.frame(cluster.cor)
# library(pheatmap)
colorBreaks_cor = seq(-0.8,0.8,length=1000)
palette_cor <- colorRampPalette(c("blue", "white", "red"))(n = length(colorBreaks_cor))
pheatmap(cluster.cor, scale = "none",
cluster_cols = F)
pheatmap(cluster.cor, scale = "none", breaks = colorBreaks_cor, color = palette_cor,
cluster_cols = T)
saveRDS(treg.amp, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_orig.rds")
# treg.amp <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_orig.rds")
Idents(treg.amp) <- "seurat_clusters"
#RNA
treg.amp <- treg.amp %>%
subset(idents = c(2,8,9,11,12), invert= T) %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
ScaleData()%>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
RunPCA(verbose = F)
#protein
treg.amp <- NormalizeData(treg.amp, normalization.method = "CLR", margin = 2, assay = "ADT") %>%
ScaleData(assay = "ADT")
treg.amp
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
An object of class Seurat 33596 features across 5863 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca",
plot_convergence = TRUE, max_iter = 10,
early_stop = T)
fig.size(5,5)
ElbowPlot(treg.amp, ndims = 50, reduction = "harmony")
ElbowPlot(treg.amp, ndims = 50, reduction = "pca")
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3,
spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph
treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
resolution = 1, verbose = TRUE)
# dims_use = 1:20
# treg.amp <- treg.amp %>%
# FindNeighbors(reduction="harmony", dims=dims_use, verbose=FALSE) %>%
# FindClusters(resolution=0.5, verbose=FALSE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 5863 Number of edges: 70188 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7157 Number of communities: 14 Elapsed time: 0 seconds
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(5, 6)
red.use = "humap"
DimPlot(treg.amp, reduction = red.use, label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
fig.size(8,16)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "MKI67", "CD4", "CD8A"), ncol=3, order = T, pt.size = 0.5)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "MKI67", "CD4", "CD8A"), ncol=3, order = F, pt.size = 0.5)
Idents(treg.amp) <- "seurat_clusters"
treg.amp <- FindSubCluster(treg.amp, cluster = 3, resolution = 0.3, graph.name = "humap_fgraph")
fig.size(8,8)
DimPlot(treg.amp, reduction = red.use, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20)) +
scale_color_tableau("Tableau 20")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 628 Number of edges: 6528 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8060 Number of communities: 4 Elapsed time: 0 seconds
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "sub.cluster")
m.show <- treg.markers %>%
group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
m.show[1:30,]
| row | 0 | 1 | 10 | 11 | 12 | 13 | 2 | 3_0 | 3_1 | 3_2 | 3_3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | MALAT1 | JUNB | ZNF331 | ISG15 | TNFRSF18 | TNFRSF4 | TNFRSF18 | STMN1 | CENPF | STMN1 | TUBA1B | IL32 | ZNF331 | KLF2 | TNFRSF4 | LMNA | CCL5 |
| 2 | RPL13 | FOS | JUNB | MX1 | TNFRSF4 | TNFRSF18 | S100A4 | CCL5 | ASPM | TUBA1B | STMN1 | S100A4 | JUNB | JUNB | TNFRSF18 | S100A4 | NKG7 |
| 3 | MT-ND2 | JUN | ANXA1 | XAF1 | TNFRSF1B | PKM | IL32 | TUBA1B | HMGB2 | DUT | TUBB | LTB | FOS | DUSP1 | CTLA4 | KLF2 | GZMA |
| 4 | MT-ND1 | KLF2 | FOS | IFI6 | CTLA4 | ENO1 | TNFRSF4 | HMGB2 | MKI67 | PCNA | HMGB2 | MALAT1 | RGS2 | JUN | SRGN | S100A10 | GZMH |
| 5 | RPL32 | NR4A2 | AREG | IFIT3 | CTSC | HSP90AB1 | LGALS1 | TUBB | CCL5 | GAPDH | TOP2A | CD52 | AREG | TSC22D3 | MAF | ANXA2 | STMN1 |
| 6 | MT-CYB | CD69 | CD69 | LY6E | CD7 | EIF5A | CTSC | NKG7 | STMN1 | CD74 | HIST1H4C | GBP5 | SESN3 | DNAJB1 | DUSP4 | S100A6 | GZMK |
| 7 | RPLP2 | BTG2 | CREM | ISG20 | SPOCK2 | BATF | CTLA4 | GZMA | GZMA | TYMS | MKI67 | TXNIP | BTG1 | TNFAIP3 | CD7 | MYADM | CCL4 |
| 8 | RPL34 | FOSB | FOSB | OASL | BATF | PGAM1 | LMNA | HIST1H4C | TOP2A | TUBB | H2AFZ | SYNE2 | ZFP36L2 | BTG2 | ITM2A | EMP3 | CD8A |
| 9 | MT-ND3 | ZNF331 | NR4A2 | IFIT2 | IL32 | MIR155HG | BATF | MKI67 | TUBA1B | HLA-DRA | HMGB1 | EVL | CXCR4 | RGS1 | TIGIT | CD52 | DUT |
| 10 | MT-ND4 | YPEL5 | TNFAIP3 | MX2 | PTPN7 | PARK7 | LGALS3 | TOP2A | NKG7 | HMGB2 | CENPF | N4BP2L2 | ITM2A | FTH1 | SPOCK2 | LGALS3 | HLA-DRB1 |
| 11 | RPL39 | ZFP36L2 | YPEL5 | IFIT1 | LAIR2 | CALR | TYMP | ASPM | PTTG1 | COTL1 | TYMS | NKTR | YPEL5 | KLF6 | KLRB1 | LGALS1 | GZMB |
| 12 | RPS18 | EEF1B2 | DUSP1 | STAT1 | RNF213 | PSME2 | GLRX | CENPF | TUBB | PCLAF | DUT | PNISR | SRGN | FOSB | RNF19A | VIM | TUBA1B |
| 13 | RPS6 | AREG | DUSP2 | OAS1 | ENTPD1 | TNFRSF9 | LINC01943 | NUSAP1 | SMC4 | PKM | HMGN2 | S100A6 | MAL | CREM | CD2 | ITGB1 | COTL1 |
| 14 | RPL38 | DUSP1 | ZFP36L2 | EPSTI1 | NEAT1 | GAPDH | IL2RA | HMGB1 | CENPE | H2AFZ | GAPDH | ACAP1 | JUND | JUND | UCP2 | S100A11 | PCNA |
| 15 | RPS14 | RPS12 | SRGN | EIF2AK2 | LTB | SRM | HLA-DRB1 | HMGN2 | UBE2C | HLA-DRB1 | MT2A | CYBA | MAGEH1 | MYADM | ARID5B | TSC22D3 | HLA-DRA |
| 16 | RPL9 | CXCR4 | SLC2A3 | HERC5 | MT-ND5 | CTLA4 | CD74 | CCL4 | ARL6IP1 | HMGB1 | HLA-DRB1 | DDX17 | STAT3 | AC058791.1 | NMB | IL32 | CD74 |
| 17 | MT-CO3 | DNAJA1 | DNAJA1 | RGS1 | IL2RA | DNPH1 | S100A6 | SMC4 | HMGN2 | TPI1 | NUSAP1 | PTPRC | RPL9 | FOS | CREM | TAGLN2 | APOBEC3G |
| 18 | RPL37 | SOCS3 | JUN | TYMP | MAF | LDHA | PHLDA1 | GZMK | TPX2 | DEK | HLA-DRA | HLA-A | RPS27 | NR4A2 | LAG3 | CRIP2 | PCLAF |
| 19 | RPS12 | BTG1 | CXCR4 | PMAIP1 | TYMP | RANBP1 | GAPDH | HIST1H1C | UBE2S | ENO1 | PCLAF | TMSB10 | RNF19A | LMNA | ICA1 | CRIP1 | TYMS |
| 20 | RPL35A | RPS3A | AC020916.1 | IFI44L | LAYN | NME1 | ENO1 | HIST1H1D | NUSAP1 | LGALS1 | ASPM | B2M | RPS13 | TENT5C | CD27 | ATP2B1 | HMGB2 |
| 21 | RPL30 | RPS6 | SOCS1 | SP100 | PKM | SEC61B | TIGIT | TYMS | CCNB1 | PFN1 | ENO1 | NA | RPL30 | HSP90AA1 | LAIR2 | SAMHD1 | HLA-DPA1 |
| 22 | RPL36 | RPS13 | LMNA | SAMD9L | LGALS1 | PRDX1 | TNFRSF1B | H2AFZ | DLGAP5 | SLC25A5 | CD74 | NA | RPS3A | CD69 | MAGEH1 | ISG20 | DEK |
| 23 | RPL12 | RPS8 | IL7R | TRIM22 | CD2 | NDUFS6 | CARD16 | TMPO | KIF20B | RANBP1 | UBE2C | NA | TSHZ2 | DNAJA1 | CD4 | ARL4C | CST7 |
| 24 | RPS20 | PABPC1 | RGCC | SAT1 | CD74 | EIF5B | FOXP3 | HIST1H1B | HMGB1 | ACTB | SLC25A5 | NA | ICA1 | IER2 | C9orf16 | SH3BGRL3 | CTSW |
| 25 | MT-ATP6 | RPL9 | NR3C1 | ZC3HAV1 | LAG3 | NINJ1 | MYL6 | UBE2C | JPT1 | MCM7 | PTTG1 | NA | RPS4X | ZFP36 | LIMS1 | DUSP4 | ANXA1 |
| 26 | RPS23 | RPS5 | ALOX5AP | SMCHD1 | IL2RG | PPA1 | CYTOR | H2AFV | KPNA2 | HLA-DPA1 | SMC4 | NA | RPL21 | IRF1 | IL32 | CLIC1 | HELLS |
| 27 | RPS3A | RPL32 | GPR183 | RNF213 | IL2RB | SET | CLIC1 | DEK | TUBA1C | HMGN2 | LGALS1 | NA | RPS14 | YPEL5 | DUSP2 | EZR | MT2A |
| 28 | RPL27A | RPL3 | BTG2 | SP110 | CD4 | PSMA7 | DUSP4 | KNL1 | LGALS1 | PPIA | TPI1 | NA | EEF1A1 | BTG1 | PHLDA1 | TXN | LCP1 |
| 29 | RPL11 | RPL30 | JUND | PARP14 | CD27 | TNFRSF1B | S100A10 | CD8A | BIRC5 | TK1 | RAN | NA | RPS27A | PPP1R15A | NDUFV2 | CORO1A | GSTP1 |
| 30 | MT-CO2 | NFKBIA | BTG1 | IRF1 | TNFRSF9 | REL | CXCR6 | H2AFX | H2AFZ | RAN | RRM2 | NA | RPL3 | CD55 | PKM | PBXIP1 | IDH2 |
fig.size(5, 20)
VlnPlot(treg.amp, features = c("FOXP3", "CD4", "CD8A", "MKI67"), group.by = "sub.cluster")
#RNA
Idents(treg.amp) <- "sub.cluster"
treg.amp <- treg.amp %>%
subset(idents = c("3_0", "3_1", "9"), invert= T) %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
ScaleData()%>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
RunPCA(verbose = F)
#protein
treg.amp <- NormalizeData(treg.amp, normalization.method = "CLR", margin = 2, assay = "ADT") %>%
ScaleData(assay = "ADT")
treg.amp
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
An object of class Seurat 33596 features across 5368 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
fig.size(4,4)
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca",
plot_convergence = TRUE, max_iter = 10,
early_stop = T)
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony 5/10 Harmony 6/10 Harmony 7/10 Harmony 8/10 Harmony 9/10 Harmony 10/10 Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3,
spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph
treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
resolution = 0.5, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 5368 Number of edges: 64887 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7972 Number of communities: 12 Elapsed time: 0 seconds
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(5, 6)
red.use = "humap"
DimPlot(treg.amp, reduction = red.use, label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
fig.size(8,16)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "MKI67", "CD4", "CD8A"),
ncol=3, order = F)
fig.size(5, 10)
FeaturePlot(treg.amp, reduction = "humap", features = c("IL7R", "CD127/IL-7R-alpha-prot"), order = F)
VlnPlot(treg.amp, features = c("IL7R", "CD127/IL-7R-alpha-prot"))
VlnPlot(treg.amp, features = c("FOXP3", "IL2RA"))
Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead” Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
table(treg.amp$cluster)
T-18: Proliferating T-8: CD4+ CD25-high Treg T-9: CD4+ CD25-low Treg
344 2510 2514
treg.amp
table(treg.amp$seurat_clusters)
An object of class Seurat 33596 features across 5368 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
0 1 2 3 4 5 6 7 8 9 10 11 1631 1054 858 467 424 264 253 139 130 54 48 46
5368 - 264
DotPlot(treg.amp, features = c("FOXP3", "IL2RA", "IL7R", "MKI67"), scale = F)
#Psudobulk
pb <- FetchData(object = treg.amp,
vars = c("FOXP3", "IL2RA", "AREG", "IL7R", "CD127/IL-7R-alpha-prot", "seurat_clusters"),
slot = "data", assay = "RNA")
pb <- pb %>% group_by(seurat_clusters) %>% summarise_all("mean") %>% rename("CD127.prot" = "adt_CD127/IL-7R-alpha-prot")
# pb%>% head
Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
pb%>% ggplot(aes(x = FOXP3, y = CD127.prot, label = seurat_clusters, color = AREG)) + geom_point() + geom_smooth(method = "lm", se = FALSE) + geom_label()
`geom_smooth()` using formula = 'y ~ x' Warning message: “The following aesthetics were dropped during statistical transformation: label, colour ℹ This can happen when ggplot fails to infer the correct grouping structure in the data. ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?”
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "seurat_clusters")
m.show <- treg.markers %>%
group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
m.show[1:30,]
| row | 0 | 1 | 10 | 11 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | RPL13 | TNFRSF18 | AC004585.1 | LMNA | JUNB | JUN | IL32 | TUBA1B | ITM2A | TNFRSF18 | CCL5 | ISG15 |
| 2 | RPLP2 | LGALS1 | TSHZ2 | S100A10 | ZNF331 | KLF2 | GBP5 | STMN1 | DUSP4 | IL32 | GZMK | MX1 |
| 3 | RPL32 | IL32 | ITM2A | VIM | FOS | DUSP1 | SYNE2 | TUBB | SRGN | NEAT1 | ANXA1 | XAF1 |
| 4 | RPS12 | S100A4 | TCF7 | MYADM | NR4A2 | BTG2 | SLFN5 | HMGB2 | TNFRSF4 | SPOCK2 | GZMA | IFI6 |
| 5 | RPS18 | CTSC | RBPJ | CRIP2 | YPEL5 | JUNB | NKTR | DUT | KLRB1 | MAF | GIMAP7 | LY6E |
| 6 | RPS6 | TNFRSF4 | PGM2L1 | CXCR4 | AREG | DNAJB1 | FLNA | HMGB1 | NMB | TNFRSF4 | CST7 | ISG20 |
| 7 | RPL34 | HLA-DRB1 | CXCL13 | TAGLN2 | JUN | RGS1 | TTN | H2AFZ | RNF19A | TNFRSF1B | NKG7 | EIF2AK2 |
| 8 | RPS20 | CD74 | FKBP5 | CRIP1 | FOSB | FOS | EVL | GAPDH | CTLA4 | CYTOR | GZMM | EPSTI1 |
| 9 | RPL39 | GAPDH | MAF | KLF6 | CD69 | CD69 | PCSK7 | TYMS | CD7 | CD7 | GIMAP4 | STAT1 |
| 10 | RPS14 | ENO1 | RNF19A | CD69 | DNAJA1 | FOSB | MTRNR2L12 | MKI67 | ICA1 | BATF | COTL1 | MX2 |
| 11 | RPL30 | CTLA4 | PVALB | AHNAK | ZFP36L2 | TSC22D3 | MALAT1 | CD74 | MAF | CTLA4 | ALOX5AP | OAS1 |
| 12 | RPS3A | LGALS3 | NMB | S100A11 | DUSP2 | JUND | SAMHD1 | HLA-DRA | NR3C1 | DUSP4 | HCST | HERC5 |
| 13 | RPL9 | MYL6 | STAT3 | TSC22D3 | BTG2 | KLF6 | CD74 | HLA-DRB1 | C9orf16 | FOXP3 | CD99 | IFIT1 |
| 14 | RPL38 | LMNA | ID2 | ATP2B1 | DUSP1 | HSP90AA1 | S100A4 | PCNA | TNFRSF18 | LINC01943 | APOBEC3G | SP100 |
| 15 | RPL35A | CLIC1 | NR3C1 | EZR | SOCS3 | CREM | N4BP2L2 | HMGN2 | UCP2 | CD2 | KLRG1 | TYMP |
| 16 | RPS13 | CYTOR | FYB1 | RGCC | CXCR4 | NR4A2 | RNF213 | PCLAF | MAGEH1 | CD74 | AHNAK | IFIT3 |
| 17 | RPL3 | S100A6 | SESN3 | LRRFIP1 | UBE2S | DNAJA1 | HLA-DPB1 | COTL1 | TIGIT | RNF213 | MYL12A | SAMD9L |
| 18 | RPS8 | TYMP | IL6ST | ANXA2 | EEF1B2 | MYADM | PNISR | ENO1 | CD2 | MIR4435-2HG | LYAR | TRIM22 |
| 19 | RPL37 | S100A10 | PPP2R5C | IDS | BTG1 | GADD45B | IL10RA | LGALS1 | DUSP2 | CTSC | CD8A | IFI44L |
| 20 | RPS23 | TNFRSF1B | DRAIC | EMP3 | PABPC1 | TNFAIP3 | DDX17 | TPI1 | TSHZ2 | TIGIT | CD81 | RNF213 |
| 21 | MT-ND1 | ARPC1B | ZFP36L1 | CD44 | CYCS | AC058791.1 | CD52 | MT2A | CD27 | PHLDA1 | VIM | OASL |
| 22 | RPL36 | BATF | TRIM8 | JUND | JUND | IER2 | MPHOSPH8 | SLC25A5 | NDUFV2 | C4orf48 | ANK3 | BST2 |
| 23 | EEF1B2 | GLRX | CD84 | RPS5 | RPS12 | IRF1 | HLA-A | PKM | OAZ1 | ENTPD1 | ITGB2 | SAMD9 |
| 24 | RPL27A | PKM | PPP1CC | S100A6 | RPL21 | FTH1 | ARGLU1 | TOP2A | PEBP1 | LAYN | CAPZB | IFI16 |
| 25 | RPS25 | ACTB | SLC9A9 | PGM2L1 | RGCC | LMNA | CYBA | DEK | GAPDH | GBP5 | GPR171 | IRF7 |
| 26 | MALAT1 | LINC01943 | CD28 | RPL10A | RPS3A | YPEL5 | S100A6 | RANBP1 | ALOX5AP | MT-ND5 | IFITM2 | SAT1 |
| 27 | RPL12 | UCP2 | ARMH1 | PCSK1N | SLC2A3 | SAT1 | PTPRC | CENPF | ID2 | IL2RB | ANXA6 | SMCHD1 |
| 28 | RPL11 | DUSP4 | CEMIP2 | NUDC | RPS13 | ZFP36 | TMSB10 | HLA-DPA1 | HMGN1 | SAT1 | STK17A | SPATS2L |
| 29 | RPL22 | IL2RA | ST8SIA1 | DUSP10 | RPS5 | HSPA8 | NA | RAN | RBPJ | CD4 | CIB1 | FOXP3 |
| 30 | MT-ND2 | HLA-A | HIF1A | SSBP4 | CHMP1B | SELENOK | NA | ACTB | COTL1 | TYMP | NR3C1 | SP110 |
fig.size(5,7)
sum.donor <- treg.amp@meta.data %>% filter(treatment != "OA") %>% select(donor, CTAP, treatment, seurat_clusters)
pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 2)
# colSums(pt)
pheatmap(pt, scale = "none", main = "prop of cluster")
pt %>% data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>%
ggplot(aes(x = CTAP, y = Freq, fill = Cluster)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")
pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 1)
# pt
pheatmap(pt, scale = "none", main = "prop of CTAP")
pt %>% data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>%
ggplot(aes(x = Cluster, y = Freq, fill = CTAP)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")
saveRDS(treg.amp, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_filtered_QC.rds")
# treg.amp <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_filtered_QC.rds")
fig.size(10, 10)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "CXCR6"), ncol=2, order = F, pt.size = 0.0001)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "CXCR6"), ncol=2, order = T, pt.size = 0.0001)
# removing clusters 8,2,10 ( high CD127 low FOXP3)
#RNA
# Idents(treg.amp) <- "sub.cluster"
treg.amp <- treg.amp %>%
subset(idents = c("2", "8", "10"), invert= T) %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
ScaleData()%>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
RunPCA(verbose = F)
#protein
treg.amp <- treg.amp %>%
NormalizeData(normalization.method = "CLR", margin = 2, assay = "ADT") %>%
ScaleData(assay = "ADT")
treg.amp
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
An object of class Seurat 33596 features across 4332 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
4332- 264
treg.amp <- RunHarmony(treg.amp, group.by.vars=c("donor"), reduction.use = "pca",
plot_convergence = TRUE, max_iter = 10,
early_stop = T)
fig.size(5,5)
ElbowPlot(treg.amp, ndims = 50, reduction = "harmony")
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony 5/10 Harmony 6/10 Harmony 7/10 Harmony 8/10 Harmony 9/10 Harmony 10/10 Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
# Run UMAP
set.seed(1)
HU <- uwot::umap(treg.amp@reductions$harmony@cell.embeddings, min_dist = 0.3,
spread = 0.5, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(treg.amp)
treg.amp[['humap']] <- Seurat::CreateDimReducObject(
embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(treg.amp)
treg.amp[['humap_fgraph']] <- HU_graph
treg.amp <- FindClusters(treg.amp, graph.name = 'humap_fgraph',
resolution = 0.5, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 4332 Number of edges: 51609 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8115 Number of communities: 10 Elapsed time: 0 seconds
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
fig.size(4, 5)
DimPlot(treg.amp, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "sex", label = F, repel = T, shuffle = T, pt.size = 0.1, label.size = 6) + theme(text = element_text(size = 20))
fig.size(6, 12)
DimPlot(treg.amp, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
DimPlot(treg.amp, reduction = red.use, group.by = "cluster", label = F, repel = T, shuffle = T,pt.size = 0.5, label.size = 6) + theme(text = element_text(size = 20))
fig.size(4,15)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "NR3C1"), ncol=4, order = T, pt.size =0.0001)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "IL2RA", "AREG", "NR3C1"), ncol=4, order = F, pt.size = 0.0001)
VlnPlot(treg.amp, features = c("FOXP3", "AREG"))
#number of cells per cluster
p.cells.foxp3 <- FetchData(treg.amp, vars= c("FOXP3", "seurat_clusters"), assay="RNA", slot = "data")
p.cells.foxp3 <- p.cells.foxp3%>% mutate(FOXP3.e = FOXP3 > 0)%>% group_by(seurat_clusters, FOXP3.e)%>% summarise(n = n())%>% ungroup()
ggplot(p.cells.foxp3, aes(x=seurat_clusters, y=n, fill = FOXP3.e)) + geom_bar(stat = "identity")
`summarise()` has grouped output by 'seurat_clusters'. You can override using the `.groups` argument.
Idents(treg.amp) <- "seurat_clusters"
treg.markers <- wilcoxauc(treg.amp, "seurat_clusters")
m.show <- treg.markers %>%
group_by(group) %>% filter(padj <0.05 & auc > 0.6) %>%
arrange(group, desc(logFC)) %>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
m.show[1:30,]
| row | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | RPL13 | TNFRSF18 | JUN | IL32 | TUBA1B | TNFRSF18 | NMB | LMNA | ISG15 | GZMK |
| 2 | RPL32 | TNFRSF4 | KLF2 | SYNE2 | STMN1 | CD7 | ICA1 | S100A10 | MX1 | GZMA |
| 3 | RPLP2 | IL32 | DUSP1 | RNF213 | TUBB | MAF | ITM2A | CRIP2 | XAF1 | CCL5 |
| 4 | RPS12 | CTSC | JUNB | N4BP2L2 | HMGB2 | TNFRSF4 | ZNF331 | VIM | IFI6 | CST7 |
| 5 | KLF2 | S100A4 | FOS | GBP5 | DUT | CD2 | NR3C1 | ANXA1 | LY6E | GIMAP7 |
| 6 | RPS18 | LGALS1 | BTG2 | MALAT1 | H2AFZ | CTLA4 | SRGN | TAGLN2 | IFIT3 | NUCB2 |
| 7 | RPS6 | CTLA4 | FOSB | SLFN5 | HMGB1 | SPOCK2 | C9orf16 | CRIP1 | OASL | CD81 |
| 8 | RPS20 | GAPDH | DNAJB1 | NKTR | TYMS | NEAT1 | NDUFV2 | MYADM | MX2 | ANXA1 |
| 9 | RPS3A | ENO1 | RGS1 | TXNIP | GAPDH | TNFRSF1B | RNF19A | AHNAK | ISG20 | LYAR |
| 10 | RPL34 | MYL6 | CD69 | EVL | MKI67 | DUSP4 | GPX4 | S100A11 | STAT1 | TUT4 |
| 11 | FOS | SRGN | NR4A2 | MTRNR2L12 | HLA-DRA | IL32 | TIGIT | ATP2B1 | IFIT1 | RPL23A |
| 12 | ZFP36L2 | LMNA | KLF6 | SAMHD1 | PCNA | CYTOR | DUSP4 | LRRFIP1 | OAS1 | ATM |
| 13 | RPL9 | TYMP | TSC22D3 | PNISR | HMGN2 | SRGN | TNFRSF4 | ANXA2 | EPSTI1 | RPS12 |
| 14 | RPL30 | CLIC1 | CREM | MPHOSPH8 | CD74 | SYNE2 | RGS2 | EMP3 | SP100 | HCST |
| 15 | RPS14 | BATF | JUND | DDX17 | PCLAF | FOXP3 | CD7 | RGCC | HERC5 | UTRN |
| 16 | RPL39 | HLA-DRB1 | DNAJA1 | ARGLU1 | COTL1 | PBXIP1 | PEBP1 | KLF6 | TYMP | CTSW |
| 17 | RPL38 | CD74 | GADD45B | CD52 | HLA-DRB1 | RNF213 | MAGEH1 | PBXIP1 | EIF2AK2 | CCR7 |
| 18 | RPS8 | TNFRSF1B | MYADM | PTPRC | SLC25A5 | EML4 | HINT1 | EZR | ZC3HAV1 | NKG7 |
| 19 | RPS13 | DUSP4 | HSP90AA1 | NA | TPI1 | PHLDA1 | SESN3 | IDS | SMCHD1 | RPS27 |
| 20 | EEF1B2 | ARPC1B | IER2 | NA | ENO1 | XIST | AC004585.1 | S100A6 | SAMD9L | RPL21 |
| 21 | TCF7 | PHLDA1 | FTH1 | NA | TOP2A | PRDM1 | CD27 | TSC22D3 | IFI44L | GIMAP1 |
| 22 | RPL3 | CD2 | AC058791.1 | NA | DEK | IL2RB | FABP5 | CD44 | TRIM22 | RPS3 |
| 23 | RPL35A | PKM | TNFAIP3 | NA | MT2A | N4BP2L2 | RPL15 | CXCR4 | SAT1 | AC004585.1 |
| 24 | RPS23 | CYTOR | HSPA8 | NA | PKM | THADA | HMGN1 | CD99 | BST2 | TCF7 |
| 25 | RPL37 | LINC01943 | LMNA | NA | RANBP1 | CD4 | RNASET2 | YWHAQ | IFIT2 | RPS14 |
| 26 | RPL27A | TIGIT | PPP1R15A | NA | CENPF | MIR4435-2HG | SLC25A3 | PLP2 | IRF7 | ANK3 |
| 27 | RPL11 | ACTB | YPEL5 | NA | RAN | CST7 | SEC11C | SRGN | SAMD9 | RPS27A |
| 28 | RPS25 | LGALS3 | IRF1 | NA | HIST1H4C | RBPJ | LBH | UCP2 | IFI16 | RPL30 |
| 29 | RPL36 | MAF | SAT1 | NA | LGALS1 | C4orf48 | SEC11A | RABAC1 | SPATS2L | RPL13A |
| 30 | CCR7 | HLA-A | ZFP36 | NA | LDHA | TYMP | OAZ1 | CLIC1 | RNF213 | RPL35A |
fig.size(4,15)
FeaturePlot(treg.amp, reduction = "humap", features = c("FOXP3", "GZMK", "CD4", "CD8A"), ncol=4, order = F, pt.size = 0.0001)
fig.size(5,7)
sum.donor <- treg.amp@meta.data %>% filter(treatment != "OA") %>% select(donor, CTAP, treatment, seurat_clusters) %>% distinct()
pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 2)
# colSums(pt)
pheatmap(pt, scale = "none", main = "prop of cluster")
pt %>% data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>%
ggplot(aes(x = CTAP, y = Freq, fill = Cluster)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")
pt <- prop.table(table(sum.donor$CTAP, sum.donor$seurat_clusters), 1)
# pt
pheatmap(pt, scale = "none", main = "prop of CTAP")
pt %>% data.frame() %>% setNames(c("CTAP", "Cluster", "Freq")) %>%
ggplot(aes(x = Cluster, y = Freq, fill = CTAP)) + theme_bw(base_size = 20)-> pl
pl + geom_col(position = "stack") + scale_fill_tableau("Tableau 20")
pl + geom_col(position = "fill") + scale_fill_tableau("Tableau 20")
table(treg.amp$cluster_name)
T-18: Proliferating T-8: CD4+ CD25-high Treg T-9: CD4+ CD25-low Treg
302 2262 1768
saveRDS(treg.amp, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_filtered_refined.rds")
# treg.amp <- readRDS("/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_prolif_filtered_refined.rds")
#Psudobulk and correlation
amp.clusters <- FetchData(object = amp,
vars = c(amp@assays$RNA@var.features, "cluster"),
slot = "data", assay = "RNA")
amp.clusters <- amp.clusters %>% group_by(cluster) %>% summarise_all("mean")
amp.clusters <- data.frame(amp.clusters)
row.names(amp.clusters) <- amp.clusters$cluster
amp.clusters$cluster <- NULL
cluster.cor.amp <- cor(t(scale(amp.clusters)), t(scale(amp.clusters)))
Warning message in asMethod(object): “sparse->dense coercion: allocating vector of size 1.4 GiB”
cluster.cor.amp <- data.frame(cluster.cor.amp)
cluster.cor.amp[c("T-8: CD4+ CD25-high Treg", "T-9: CD4+ CD25-low Treg"),1:5]
| T.0..CD4..IL7R..memory | T.10..CD4..OX40.NR3C1. | T.11..CD4..CD146..memory | T.12..CD4..GNLY. | T.13..CD8..GZMK.B..memory | |
|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| T-8: CD4+ CD25-high Treg | -0.09481217 | 0.2052484 | 0.10680617 | -0.2199580 | -0.08729903 |
| T-9: CD4+ CD25-low Treg | 0.14142870 | 0.3901259 | 0.01403401 | -0.2340225 | -0.35842914 |
fig.size(8,8)
# library(pheatmap)
colorBreaks_cor = seq(-1,1,length=1000)
palette_cor <- colorRampPalette(c("#D4AC0D", "white", "#800080"))(n = length(colorBreaks_cor))
pheatmap(cluster.cor.amp, scale = "none",
cluster_cols = T)
pheatmap(cluster.cor.amp, scale = "none", breaks = colorBreaks_cor, color = palette_cor,
cluster_cols = T)
fig.size(5, 20)
VlnPlot(amp, features = c("FOXP3","IL2RA"), ncol=2, group.by = "cluster")
VlnPlot(amp, features = c("CD4-prot","CD8a-prot"), ncol=2, pt.size = 0, group.by = "cluster")
VlnPlot(amp, features = c("CD127/IL-7R-alpha-prot"), ncol=2, pt.size = 0, group.by = "cluster")
Warning message: “Could not find CD4-prot in the default search locations, found in ADT assay instead” Warning message: “Could not find CD8a-prot in the default search locations, found in ADT assay instead”
Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
unique(amp$cluster[grepl("CD4\\+", amp$cluster)])
Idents(amp) <- "cluster"
alt.Treg <- subset(amp, idents = unique(amp$cluster[grepl("CD4\\+", amp$cluster)]))
alt.Treg
An object of class Seurat 33596 features across 58105 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 2 dimensional reductions calculated: pca, umap
alt.Treg <- alt.Treg %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData() %>%
RunPCA(verbose = F)
Centering and scaling data matrix
alt.Treg <- NormalizeData(alt.Treg, normalization.method = "CLR", margin = 2, assay = "ADT") %>%
ScaleData(assay = "ADT")
Normalizing across cells Centering and scaling data matrix
fig.size(5,10)
DimPlot(alt.Treg, reduction = "pca")
DimPlot(alt.Treg, reduction = "pca", group.by = "donor",shuffle = T)
alt.Treg <- RunHarmony(alt.Treg, group.by.vars="donor", reduction = "pca", theta = 0, plot_convergence = 2,
## prevent early stopping
epsilon.harmony = -Inf, epsilon.cluster = -Inf,
max.iter.harmony = 10, max.iter.cluster = 50,
kmeans_init_nstart=5,
kmeans_init_iter_max=100)
fig.size(5,5)
ElbowPlot(alt.Treg, ndims = 50, reduction = "harmony")
ElbowPlot(alt.Treg, ndims = 50, reduction = "pca")
Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony 5/10 Harmony 6/10 Harmony 7/10 Harmony 8/10 Harmony 9/10 Harmony 10/10 Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
# dims_use = 1:20
# alt.Treg <- RunUMAP(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindNeighbors(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindClusters(object=alt.Treg, resolution=1, verbose=FALSE)
# Run UMAP
set.seed(1)
HU <- uwot::umap(alt.Treg@reductions$harmony@cell.embeddings, min_dist = 0.3,
spread = 0.8, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(alt.Treg)
alt.Treg[['humap']] <- Seurat::CreateDimReducObject(embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(alt.Treg)
alt.Treg[['humap_fgraph']] <- HU_graph
alt.Treg <- FindClusters(alt.Treg, graph.name = 'humap_fgraph',
resolution = 1, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 58105 Number of edges: 699332 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7738 Number of communities: 17 Elapsed time: 6 seconds
fig.size(5,5)
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
red.use = "humap"
DimPlot(alt.Treg, reduction = red.use, label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20))+
scale_color_tableau("Tableau 20")
fig.size(5,10)
DimPlot(alt.Treg, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20))
VlnPlot(alt.Treg, features = c("FOXP3", "IL2RA"), ncol = 1)
# library(presto)
Idents(alt.Treg) <- "seurat_clusters"
dge_wilxocon <- wilcoxauc(alt.Treg, "seurat_clusters")
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)
dge_wilxocon %>%
group_by(group) %>%
slice_max(n = 25, order_by = logFC)%>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) #%>% write.csv("AMP.reclustering.markers.csv")
| row | 0 | 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | ANXA1 | CXCL13 | NKG7 | RPS3A | JUN | NEAT1 | ISG15 | FN1 | STMN1 | CCL5 | AREG | IL7R | IL7R | S100A4 | RTKN2 | LAG3 | IL32 |
| 2 | TNFAIP3 | TSHZ2 | GNLY | RPS13 | FOSB | NKTR | IFIT2 | PRG4 | TUBA1B | GZMK | JUNB | GIMAP7 | FOS | ANXA1 | TIGIT | TNFRSF18 | TIGIT |
| 3 | CD69 | ITM2A | GZMH | RPS12 | FOS | DDX17 | IFIT3 | CLU | HMGB2 | NKG7 | YPEL5 | IKZF1 | LTB | LGALS1 | IKZF2 | SRGN | CTLA4 |
| 4 | LMNA | AC004585.1 | CCL5 | EEF1B2 | IER2 | IGKC | OASL | PLA2G2A | TUBB | GZMA | CD69 | TXNIP | ANXA1 | S100A10 | SELL | CTLA4 | S100A4 |
| 5 | FOS | NR3C1 | CCL4 | RPS5 | BTG2 | MALAT1 | PMAIP1 | HTRA1 | GAPDH | CCL4 | KLF2 | LTB | JUN | GZMA | SESN3 | CXCL13 | FOXP3 |
| 6 | JUN | MAF | FGFBP2 | RPS6 | HSP90AA1 | XIST | IFIT1 | COL3A1 | H2AFZ | CST7 | ZNF331 | NKTR | KLF2 | S100A11 | RGS1 | GAPDH | TNFRSF18 |
| 7 | ZFP36 | RNF19A | GZMA | RPL32 | DNAJB1 | MIAT | MX1 | COL1A2 | HMGN2 | CD74 | FOS | MT-ND2 | DUSP1 | S100A6 | MAL | MAF | RTKN2 |
| 8 | NFKBIA | FKBP5 | CST7 | RPS8 | PPP1R10 | RNF213 | IFI44L | CRTAC1 | MKI67 | HLA-DPB1 | SOCS3 | N4BP2L2 | FOSB | VIM | C12orf57 | TNFRSF4 | HLA-DRB1 |
| 9 | KLF6 | TIGIT | PRF1 | SELL | EGR1 | PCSK7 | IFI6 | MGP | DUT | HLA-DRB1 | BTG2 | MTRNR2L12 | RPS12 | CCL5 | ZNF331 | CD7 | IKZF2 |
| 10 | JUNB | PGM2L1 | GZMB | RPL9 | SAT1 | N4BP2L2 | XAF1 | CST3 | HMGB1 | HLA-DPA1 | JUN | MT-ND1 | RPL36A | EMP3 | JUNB | PTMS | CYTOR |
| 11 | FOSB | SRGN | EFHD2 | RPL5 | GADD45B | MTRNR2L12 | ZC3HAV1 | CFD | CENPF | GZMM | NR4A2 | LUC7L3 | RPS10 | CLIC1 | LTB | S100A4 | RGS1 |
| 12 | CXCR4 | CORO1B | ZEB2 | RPL22 | HSPA1B | POLR2J3.1 | HERC5 | TXNIP | MT2A | GZMH | EEF1B2 | PNISR | TPT1 | SH3BGRL3 | ARID5B | LGALS1 | LGALS1 |
| 13 | RGCC | CD7 | PLEK | RPL30 | PPP1R15A | MACF1 | LY6E | TIMP3 | DEK | APOBEC3G | PABPC1 | MALAT1 | RPLP0 | CD52 | RPL22 | SPOCK2 | TNFRSF4 |
| 14 | DUSP1 | PPP1CC | HOPX | RPS23 | MTRNR2L12 | PNISR | CD69 | LUM | TYMS | HCST | LDHB | MT-ND3 | RPL34 | IL32 | RPS8 | PDCD1 | CD74 |
| 15 | IL7R | THADA | ID2 | RPL21 | DDX3X | SYNE2 | FOS | DCN | PCNA | CCL4L2 | RPS13 | GIMAP4 | KLRB1 | CKLF | BIRC3 | S100A11 | DUSP4 |
| 16 | MYADM | TNFRSF4 | KLRG1 | KLF2 | MALAT1 | ARGLU1 | GADD45B | TIMP1 | TOP2A | CMC1 | RPS3A | DDX17 | EEF1B2 | ANXA2 | IGKC | RBPJ | IL2RA |
| 17 | AC020916.1 | LIMS1 | KLF2 | RPL34 | HEXIM1 | AKNA | MX2 | SPARCL1 | PKM | DUSP2 | RPS8 | PCSK7 | S100A4 | CD99 | RPS6 | MYL6 | TNFRSF1B |
| 18 | DNAJA1 | NMB | RAP1B | CCR7 | NEAT1 | ANKRD44 | IFI44 | FTL | ENO1 | HLA-DRA | RPS12 | PRMT2 | RPSA | HCST | RPL32 | ALOX5AP | CD27 |
| 19 | TSC22D3 | COTL1 | TRGC2 | RPL13 | SRSF7 | ASH1L | ISG20 | GSN | TPI1 | KLRG1 | DNAJA1 | ATM | RPL23A | ACTB | RPL12 | DUSP4 | GBP5 |
| 20 | BTG2 | ZNF331 | C12orf75 | RPL37 | PLCG2 | OGA | SAT1 | APOE | ACTB | CD81 | CD55 | MT-ND5 | RPS18 | TAGLN2 | CD27 | CD4 | CTSC |
| 21 | VIM | PDCD1 | CTSW | RPL39 | INTS6 | MAF | EIF2AK2 | CD63 | SMC4 | CTSW | RPS6 | MT-CYB | RPL3 | PLP2 | NABP1 | LAIR2 | CARD16 |
| 22 | FTH1 | DRAIC | IFNG | RPL35A | IER5 | CFLAR | STAT1 | THBS4 | PFN1 | LYAR | CYCS | AC004687.1 | RPS21 | MYL6 | RPS13 | PRDM1 | TBC1D4 |
| 23 | KLF2 | TNFRSF18 | GZMM | RPL7 | JUNB | MT-ND5 | JUN | MMP3 | H2AFV | LYST | RPS5 | LINC00861 | RPL39 | MYL12A | RPL9 | PKM | LTB |
| 24 | ZFP36L2 | CTLA4 | CX3CR1 | RPL11 | SLC38A2 | IKZF1 | ANXA1 | MT-ND1 | NUSAP1 | IGKC | FOSB | MT-CO3 | LDHB | TIMP1 | LEF1 | TYMP | ARID5B |
| 25 | YPEL5 | IL6ST | CTSC | RPL10A | JUND | MT-CO2 | EPSTI1 | ANXA1 | LGALS1 | SH2D1A | SELL | MTRNR2L8 | RPS19 | CD2 | RPS3A | IL32 | BATF |
p.cells.foxp3 <- FetchData(alt.Treg, vars= c("FOXP3", "seurat_clusters"), assay="RNA", slot = "data")
p.cells.foxp3 <- p.cells.foxp3%>% mutate(FOXP3.e = FOXP3 > 0) %>% group_by(seurat_clusters, FOXP3.e)%>% summarise(n = n())%>% ungroup()
ggplot(p.cells.foxp3, aes(x=seurat_clusters, y=n, fill = FOXP3.e)) + geom_bar(stat = "identity")
Error in FetchData(alt.Treg, vars = c("FOXP3", "seurat_clusters"), assay = "RNA", : object 'alt.Treg' not found
Traceback:
1. FetchData(alt.Treg, vars = c("FOXP3", "seurat_clusters"), assay = "RNA",
. slot = "data")
fig.size(6,6)
FeaturePlot(alt.Treg, reduction = red.use, features = "FOXP3")
alt.Treg$cluster[alt.Treg$seurat_clusters %in% c(7,9)]%>% table
.
T-0: CD4+ IL7R+ memory T-1: CD4+ CD161+ memory
120 53
T-10: CD4+ OX40+NR3C1+ T-11: CD4+ CD146+ memory
524 129
T-12: CD4+ GNLY+ T-2: CD4+ IL7R+CCR5+ memory
3 43
T-3: CD4+ Tfh/Tph T-4: CD4+ naive
231 358
T-5: CD4+ GZMK+ memory T-6: CD4+ memory
34 179
T-7: CD4+ Tph T-8: CD4+ CD25-high Treg
43 2208
T-9: CD4+ CD25-low Treg
1950
#Psudobulk and correlation
amp.clusters <- FetchData(object = alt.Treg,
vars = c(alt.Treg@assays$RNA@var.features, "cluster"),
slot = "data", assay = "RNA")
amp.clusters <- amp.clusters %>% group_by(cluster) %>% summarise_all("mean")
# @assays$RNA@data%>% t()%>% data.frame() %>% mutate(cluster = alt.Treg$cluster)%>%
# group_by(cluster)%>% summarise_all("mean")
# amp.c.names <- amp.clusters$cluster
new.clusters <- FetchData(object = alt.Treg,
vars = c(alt.Treg@assays$RNA@var.features, "seurat_clusters"),
slot = "data", assay = "RNA")
new.clusters <- new.clusters %>% group_by(seurat_clusters) %>% summarise_all("mean")
# new.clusters <- alt.Treg@assays$RNA@data%>% t()%>% data.frame() %>% mutate(s.cluster = alt.Treg$seurat_clusters)%>%
# group_by(s.cluster)%>% summarise_all("mean")
# new.c.names <- new.clusters$s.cluster
amp.clusters <- data.frame(amp.clusters)
row.names(amp.clusters) <- amp.clusters$cluster
amp.clusters$cluster <- NULL
new.clusters <- data.frame(new.clusters)
row.names(new.clusters) <- new.clusters$seurat_clusters
new.clusters$seurat_clusters <- NULL
cluster.cor <- cor(t(scale(amp.clusters)), t(scale(new.clusters)))
cluster.cor <- data.frame(cluster.cor)
# library(pheatmap)
colorBreaks_cor = seq(-0.8,0.8,length=1000)
palette_cor <- colorRampPalette(c("#D4AC0D", "white", "#800080"))(n = length(colorBreaks_cor))
pheatmap(cluster.cor, scale = "none",
cluster_cols = F)
pheatmap(cluster.cor, scale = "none", breaks = colorBreaks_cor, color = palette_cor,
cluster_cols = T)
Treg.clusters <- c(7,9)
alt.Treg <- subset(alt.Treg, idents = Treg.clusters)
alt.Treg
An object of class Seurat 33596 features across 5875 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
fig.size(5, 10)
FeaturePlot(alt.Treg, features = c("IL7R", "CD127/IL-7R-alpha-prot"), order = T)
VlnPlot(alt.Treg, features = c("IL7R", "CD127/IL-7R-alpha-prot"))
Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead” Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead”
alt.Treg <- alt.Treg %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData()%>%
RunPCA(verbose = F)
alt.Treg <- NormalizeData(alt.Treg, normalization.method = "CLR", margin = 2, assay = "ADT")
alt.Treg <- ScaleData(alt.Treg, assay = "ADT")
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
alt.Treg <- RunHarmony(alt.Treg, group.by.vars="donor", reduction = "pca", theta =0, plot_convergence = 2)
fig.size(5,5)
ElbowPlot(alt.Treg, ndims = 50, reduction = "harmony")
ElbowPlot(alt.Treg, ndims = 50, reduction = "pca")
Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony 5/10 Harmony 6/10 Harmony 7/10 Harmony 8/10 Harmony 9/10 Harmony 10/10 Warning message: “Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
# dims_use = 1:20
# alt.Treg <- RunUMAP(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindNeighbors(object=alt.Treg, reduction="harmony", dims=dims_use, verbose=FALSE)
# alt.Treg <- FindClusters(object=alt.Treg, resolution=1, verbose=FALSE)
set.seed(1)
HU <- uwot::umap(alt.Treg@reductions$harmony@cell.embeddings, min_dist = 0.3,
spread = 0.8, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(alt.Treg)
alt.Treg[['humap']] <- Seurat::CreateDimReducObject(
embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(alt.Treg)
alt.Treg[['humap_fgraph']] <- HU_graph
alt.Treg <- FindClusters(alt.Treg, graph.name = 'humap_fgraph',
resolution = 0.8, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 5875 Number of edges: 70378 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.7496 Number of communities: 13 Elapsed time: 0 seconds
fig.size(6,6)
DimPlot(alt.Treg, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(5,5)
# clustercols <- c(colorRampPalette(brewer.pal(8, "Set2"))(13)[1:10], colorRampPalette(brewer.pal(8, "Set1"))(13)[1:9], colorRampPalette(brewer.pal(8, "Set3"))(13)[1:9])
red.use = "humap"
DimPlot(alt.Treg, reduction = red.use, group.by = "sex", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6)
fig.size(5,10)
DimPlot(alt.Treg, reduction = red.use, group.by = "donor", label = F, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20))
fig.size(5,20)
FeaturePlot(alt.Treg, reduction = red.use, features = c("FOXP3", "CXCL13", "CD8A", "AREG"), order = F, ncol=4)
FeaturePlot(alt.Treg, reduction = red.use, features = c("FOXP3", "CXCL13", "CD8A", "AREG"), order = T, ncol=4)
# fig.size(5,30)
# FeaturePlot(alt.Treg, features = c("FOXP3"), pt.size = 0.1, order = T, split.by = "seurat_clusters")
fig.size(10,15)
FetchData(alt.Treg, vars = c("FOXP3","CD127/IL-7R-alpha-prot", "seurat_clusters"), assay = "RNA", slot = "data") %>%
rename("CD127.prot" = "adt_CD127/IL-7R-alpha-prot") -> pt.dat
pt.dat%>%
ggplot(aes(x=FOXP3, y = CD127.prot, col = seurat_clusters)) + geom_point() +facet_wrap(~seurat_clusters) +ylim(0,2)
table(alt.Treg$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 10 11 12 895 880 790 729 701 625 476 241 178 129 102 93 36
fig.size(8,15)
library(ggridges)
pt.dat %>%
ggplot(aes(x=CD127.prot, y = seurat_clusters)) + geom_density_ridges2(alpha = 0.5) + scale_fill_tableau("Tableau 20")
VlnPlot(alt.Treg, features = c("FOXP3"))
alt.Treg$seurat_clusters%>% table
alt.Treg$cluster%>% table
alt.Treg$cluster[alt.Treg$seurat_clusters %in% c(0,3,5,8)]%>% table
. 0 1 2 3 4 5 6 7 8 9 10 11 12 895 880 790 729 701 625 476 241 178 129 102 93 36
.
T-0: CD4+ IL7R+ memory T-1: CD4+ CD161+ memory
120 53
T-10: CD4+ OX40+NR3C1+ T-11: CD4+ CD146+ memory
524 129
T-12: CD4+ GNLY+ T-2: CD4+ IL7R+CCR5+ memory
3 43
T-3: CD4+ Tfh/Tph T-4: CD4+ naive
231 358
T-5: CD4+ GZMK+ memory T-6: CD4+ memory
34 179
T-7: CD4+ Tph T-8: CD4+ CD25-high Treg
43 2208
T-9: CD4+ CD25-low Treg
1950
.
T-0: CD4+ IL7R+ memory T-1: CD4+ CD161+ memory
58 27
T-10: CD4+ OX40+NR3C1+ T-11: CD4+ CD146+ memory
308 67
T-12: CD4+ GNLY+ T-2: CD4+ IL7R+CCR5+ memory
1 18
T-3: CD4+ Tfh/Tph T-4: CD4+ naive
167 239
T-5: CD4+ GZMK+ memory T-6: CD4+ memory
13 67
T-7: CD4+ Tph T-8: CD4+ CD25-high Treg
5 209
T-9: CD4+ CD25-low Treg
1248
Idents(alt.Treg) <- "seurat_clusters"
dge_wilxocon <- wilcoxauc(alt.Treg, "seurat_clusters")
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)
dge_wilxocon %>%
group_by(group) %>%
slice_max(n = 20, order_by = logFC)%>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature) #%>% write.csv("AMP.reclustering.markers.csv")
| row | 0 | 1 | 10 | 11 | 12 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> |
| 1 | JUNB | IL32 | KLF6 | ISG15 | PRG4 | TNFRSF18 | FOS | KLF2 | NMB | GIMAP7 | KLRB1 | RPS4Y1 | CCL5 |
| 2 | ZNF331 | LGALS1 | JUN | IFIT2 | FN1 | TNFRSF4 | KLF2 | TNFAIP3 | ZNF331 | IGKC | IGKC | KLF2 | GZMK |
| 3 | CD69 | S100A4 | TNFAIP3 | PMAIP1 | PLA2G2A | CTSC | RPS3A | MYADM | RNF19A | RPS12 | AC004585.1 | JUN | GZMA |
| 4 | NR4A2 | RNF213 | YPEL5 | OASL | CLU | CTLA4 | RPL32 | TSC22D3 | ITM2A | RPS29 | ALOX5AP | IER2 | NKG7 |
| 5 | YPEL5 | HLA-DRB1 | AC020916.1 | ZC3HAV1 | CRTAC1 | IL32 | RPS20 | JUN | KLRB1 | RPLP2 | FKBP5 | GIMAP7 | CST7 |
| 6 | FOSB | CD74 | ZNF331 | IFIT3 | MGP | LMNA | EEF1B2 | IRF1 | NR3C1 | RPS21 | CD7 | HSP90AA1 | ANXA1 |
| 7 | CXCR4 | S100A6 | SAT1 | MX1 | HTRA1 | DUSP4 | RPS12 | KLF6 | JUNB | RPL39 | IGLC2 | FOS | JUN |
| 8 | FOS | CTSC | FOSB | RGS1 | KLF6 | PHLDA1 | RPS13 | DNAJB1 | SRGN | RPL13 | PTPN7 | GADD45B | FOSB |
| 9 | JUN | CYTOR | FOS | IFIT1 | TIMP1 | GAPDH | RPS6 | FTH1 | TWIST1 | PNISR | SESN3 | CD52 | DUSP2 |
| 10 | BTG2 | C15orf53 | JUND | HERC5 | CFD | BATF | RPS18 | CREM | RGS2 | LUC7L3 | PTPRC | DNAJB1 | AHNAK |
| 11 | ZFP36L2 | TNFRSF18 | VPS37B | IFI6 | COL3A1 | CD74 | RPL9 | CD55 | AREG | RPL36 | NR3C1 | TXNIP | CCL4 |
| 12 | AREG | FLNA | SLC2A3 | ISG20 | LUM | TYMP | RPLP2 | LMNA | MAGEH1 | GIMAP4 | TNFRSF4 | HSPA1B | TNFAIP3 |
| 13 | KLF2 | S100A10 | NR4A2 | LY6E | COL1A2 | ENO1 | RPL13 | RGS1 | ICA1 | RPS18 | ARGLU1 | BTG2 | NR4A2 |
| 14 | UBE2S | GBP5 | LMNA | SAT1 | RGS1 | SRGN | RPL30 | CD69 | CD7 | RPL32 | CORO1B | RIPOR2 | ZFP36L2 |
| 15 | DNAJA1 | EMP3 | RGS1 | SRSF3 | DCN | LGALS1 | RPS8 | DNAJA1 | GEM | TXNIP | PNISR | RPL36A | CD69 |
| 16 | CYCS | CYBA | CREM | DNAJA1 | TIMP3 | TNFRSF1B | RPS5 | AC058791.1 | IGFL2 | MT-ND2 | IGHG3 | RPL17 | ID2 |
| 17 | BTG1 | SYNE2 | CXCR4 | XAF1 | BATF | HLA-DRB1 | RPL3 | TENT5C | AHI1 | RPL38 | N4BP2L2 | AES | GZMM |
| 18 | DUSP1 | HLA-DPB1 | MT-CO1 | GADD45B | HLA-DRA | S100A4 | RPL22 | DUSP1 | MAL | SELL | IGHM | RPS18 | CREM |
| 19 | DUSP2 | NKTR | JUNB | EPSTI1 | CD63 | CD2 | RPS14 | CD52 | DUSP4 | RPS27 | ITM2A | GIMAP1 | IRF1 |
| 20 | CCR7 | HLA-DPA1 | CLK1 | ID2 | HLA-DPB1 | PKM | RPL27A | JUNB | DUSP2 | RPL34 | ICA1 | BIN2 | HLA-DRB1 |
#Psudobulk
pb <- FetchData(object = alt.Treg,
vars = c("FOXP3", "IL2RA", "AREG", "IL7R", "CD127/IL-7R-alpha-prot", "seurat_clusters"),
slot = "data", assay = "RNA")
pb <- pb %>% group_by(seurat_clusters) %>% summarise_all("mean") %>% rename("CD127.prot" = "adt_CD127/IL-7R-alpha-prot")
pb%>% ggplot(aes(x = FOXP3, y = IL7R, label = seurat_clusters, color = AREG)) + geom_point() + geom_smooth(method = "lm", se = FALSE) + geom_label()
Warning message: “Could not find CD127/IL-7R-alpha-prot in the default search locations, found in ADT assay instead” `geom_smooth()` using formula = 'y ~ x' Warning message: “The following aesthetics were dropped during statistical transformation: label, colour ℹ This can happen when ggplot fails to infer the correct grouping structure in the data. ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable into a factor?”
# 12 - fibroblasts ?
# 9 - GZM signature CD8
# 0,3,5- low FOXP3
# 8 - low FOXP3 - male unique (mix donors)
alt.Treg <- subset(alt.Treg, idents = c(0,3,5,8,9,12), invert = T)
alt.Treg
An object of class Seurat 33596 features across 3283 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
alt.Treg <- alt.Treg %>%
NormalizeData(normalization.method = "LogNormalize", scale.factor = 10000) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData()%>%
RunPCA(verbose = F)
alt.Treg <- NormalizeData(alt.Treg, normalization.method = "CLR", margin = 2, assay = "ADT")
alt.Treg <- ScaleData(alt.Treg, assay = "ADT")
Centering and scaling data matrix Normalizing across cells Centering and scaling data matrix
alt.Treg <- RunHarmony(alt.Treg, group.by.vars="donor", reduction = "pca", theta = 0, plot_convergence = 2)
fig.size(5,5)
ElbowPlot(alt.Treg, ndims = 50, reduction = "harmony")
ElbowPlot(alt.Treg, ndims = 50, reduction = "pca")
set.seed(1)
HU <- uwot::umap(alt.Treg@reductions$harmony@cell.embeddings, min_dist = 0.8,
spread = 1, ret_extra = 'fgraph', fast_sgd = FALSE)
colnames(HU$embedding) = c('HUMAP1', 'HUMAP2')
rownames(HU$fgraph) = colnames(HU$fgraph) = Cells(alt.Treg)
alt.Treg[['humap']] <- Seurat::CreateDimReducObject(
embeddings = HU$embedding,
assay = 'RNA',
key = 'HUMAP_',
global = TRUE
)
HU_graph <- Seurat::as.Graph(HU$fgraph)
DefaultAssay(HU_graph) <- DefaultAssay(alt.Treg)
alt.Treg[['humap_fgraph']] <- HU_graph
alt.Treg <- FindClusters(alt.Treg, graph.name = 'humap_fgraph',
resolution = 0.5, verbose = TRUE)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 3283 Number of edges: 38619 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8208 Number of communities: 11 Elapsed time: 0 seconds
fig.size(6,6)
DimPlot(alt.Treg, reduction = red.use, label = T, repel = T, shuffle = T, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau("Tableau 20")
fig.size(4,15)
FeaturePlot(alt.Treg, reduction = red.use, features = c("FOXP3", "AREG", "IL2RA"), order = F, ncol=3)
alt.Treg
An object of class Seurat 33596 features across 3283 samples within 2 assays Active assay: RNA (33538 features, 2000 variable features) 1 other assay present: ADT 4 dimensional reductions calculated: pca, umap, harmony, humap
saveRDS(alt.Treg, "/data/brennerlab/Shani/projects/Treg/analysis/AMP2_RA_tissue/AMP2_tissue_Treg_alternative.rds")
gain.alt <- setdiff(colnames(alt.Treg), colnames(treg.amp))
loss.alt <- setdiff(colnames(treg.amp), colnames(alt.Treg))
length(gain.alt)
length(loss.alt)
add2amp <- amp@meta.data %>% mutate(add2amp = if_else(condition = grepl("T-[89]", cluster), "overlap Treg", "other"))
add2amp[gain.alt, "add2amp"] <- "alternative.Treg.only"
add2amp[loss.alt, "add2amp"] <- "amp.Treg.only"
amp <- AddMetaData(amp, add2amp$add2amp, "Treg.classification")
amp$Treg.classification <- factor(amp$Treg.classification, levels = c("other", "overlap Treg", "alternative.Treg.only", "amp.Treg.only"))
DimPlot(amp, group.by = "Treg.classification", order = T, cols = c("grey", "#C48FFE", "#22D396", "#E7A937"), pt.size = 0.5)
fig.size(5,10)
DimPlot(amp, group.by = "Treg.classification", order = T, cols = c("grey", "#C48FFE", "#22D396", "#E7A937"), pt.size = 0.5, split.by = "Treg.classification")
Treg.pop.DEG <- wilcoxauc(amp, "Treg.classification", groups_use = c("amp.Treg.only", "alternative.Treg.only"))
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)
Treg.pop.DEG %>%
group_by(group) %>%
slice_max(n = 15, order_by = logFC)%>%
select(feature, group) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = group, values_from = feature)
Idents(amp) <- "Treg.classification"
DotPlot(amp, features = c("FOXP3", "IL2RA", "AREG", "IL7R", "CD127/IL-7R-alpha-prot"), scale =F)
#Psudobulk and correlation
new.clusters <- FetchData(object = alt.Treg,
vars = c(alt.Treg@assays$RNA@var.features, "seurat_clusters"),
slot = "data", assay = "RNA")
new.clusters <- new.clusters %>% group_by(seurat_clusters) %>% summarise_all("mean")
new.clusters <- data.frame(new.clusters)
row.names(new.clusters) <- new.clusters$seurat_clusters
new.clusters$seurat_clusters <- NULL
cluster.cor <- cor(t(scale(amp.clusters)), t(scale(new.clusters)))
cluster.cor <- data.frame(cluster.cor)
colorBreaks_cor = seq(-0.8,0.8,length=1000)
palette_cor <- colorRampPalette(c("#D4AC0D", "white", "#800080"))(n = length(colorBreaks_cor))
fig.size(5, 10)
pheatmap(cluster.cor, scale = "none",
cluster_cols = F)
pheatmap(cluster.cor, scale = "none", breaks = colorBreaks_cor, color = palette_cor,
cluster_cols = T)
alt.Treg <- FindSubCluster(alt.Treg, cluster = 3, resolution = 0.3, graph.name = "RNA_snn")
DimPlot(alt.Treg, group.by = "sub.cluster", label = T, repel = T, shuffle = T, pt.size = 0.5, label.size = 6) +
theme(text = element_text(size = 20)) + scale_color_tableau(palette = "Tableau 20")
VlnPlot(alt.Treg, features = c("FOXP3"), group.by = "sub.cluster") + scale_fill_tableau(palette = "Tableau 20")
alt.Treg@meta.data %>% filter(seurat_clusters %in% c(3,5,7,14)) %>% summarise(n())